################################################
# Analysis for:
#
# Warner, Zach, Garrett N. Vande Kamp, and Soren Jordan.
# ``Conditional Relationships in Dynamic Models.'' 
#  Political Science Research & Methods
#
# Blaydes and Kayser Supplemental Analysis (SM Section 7.1; Section 8.2)
#
# Required files: sm-bk-isq.dta
#                 interp_urdf.R (for interpretation)
#
################################################

# Set your working directory here

library(dplyr)
library(ggplot2)
library(grid)
library(haven)
library(lmtest)
library(dynamac)
library(urca)
library(tseries)
library(polywog)
library(MASS)
library(plyr)
library(caret)
library(foreign)

completeFun <- function(data, desiredCols) {
  completeVec <- complete.cases(data[, desiredCols])
  return(data[completeVec, ])
}

multiplot <- function(..., plotlist=NULL, file, cols=1, layout=NULL) {
  plots <- c(list(...), plotlist)
  numPlots = length(plots)
  if (is.null(layout)) {
    layout <- matrix(seq(1, cols * ceiling(numPlots/cols)),
                     ncol = cols, nrow = ceiling(numPlots/cols))
  }
  if (numPlots==1) {
    print(plots[[1]])
  } else {
    grid.newpage()
    pushViewport(viewport(layout = grid.layout(nrow(layout), ncol(layout))))
    for (i in 1:numPlots) {
      matchidx <- as.data.frame(which(layout == i, arr.ind = TRUE))
      print(plots[[i]], vp = viewport(layout.pos.row = matchidx$row,
                                      layout.pos.col = matchidx$col))
    }
  }
}

r2.corr <- function(m) {
  lmfit <-  lm(model.response(model.frame(m)) ~ fitted(m))
  summary(lmfit)$r.squared
} 

# ur.df output is annoying. interp_urdf.R helper function for interpretation
#  Author: Hank Roark. 
#  https://gist.github.com/hankroark/968fc28b767f1e43b5a33b151b771bf9
source("interp_urdf.R")


##############################################################################################################################
# SM replication: Blaydes and Kayser
##############################################################################################################################
calories <- read.dta("sm-bk-isq.dta")

# create the lags that STATA does automatically
calories <- ddply(calories, .(country), transform, l.calstotal =
                    c(NA, calstotal[-length(calstotal)]))
calories <- ddply(calories, .(country), transform, l.gdpcxr100 =
                    c(NA, gdpcxr100[-length(gdpcxr100)]))
calories <- ddply(calories, .(country), transform, l.calsanimal =
                    c(NA, calsanimal[-length(calsanimal)]))
calories <- ddply(calories, .(country), transform, l.polity3cat =
                    c(NA, polity3cat[-length(polity3cat)]))
# and the change in polity3cat
calories <- ddply(calories, .(country), transform, d_polity3cat =
                    c(NA,diff(polity3cat,lag=1)))

### subset so there are no NAs, as STATA does, matching their 3,333 obs
calories <- completeFun(calories, c("d_calstotal","l.calstotal","d_calsanimal","l.calsanimal","l.gdpcxr100",
                                    "d_gdpcxr100","polity3cat","year","country","l.polity3cat","d_polity3cat"))

calories$d_polity3cat <- relevel(as.factor(calories$d_polity3cat), ref="0")
### data for replication already omits outliers:
# calories <- calories[which(calories$groutlier == 1),]

### replication ECM results
######### Table SM. 3; ``B&K model'' model #########
ecm <- glm(d_calstotal ~ l.calstotal + d_gdpcxr100 + l.gdpcxr100 + as.factor(polity3cat) + 
            d_gdpcxr100*as.factor(polity3cat)+as.factor(country), data=calories)
summary(ecm) #note estimates slightly different because we've dropped obs for model comparability
logLik(ecm)
BIC(ecm)
r2.corr(ecm)

# treat change in polity as numeric since it's [-2,2]
calories$d_polity3cat <- as.numeric(calories$d_polity3cat)

######### Table SM. 3; ``General'' model #########
ecmfull <- glm(d_calstotal ~ l.calstotal + d_gdpcxr100 + l.gdpcxr100 + d_polity3cat + as.factor(l.polity3cat) +
             d_gdpcxr100*as.factor(l.polity3cat) + l.gdpcxr100*d_polity3cat + d_gdpcxr100*d_polity3cat +
             l.gdpcxr100*as.factor(l.polity3cat) + as.factor(country), data=calories)
summary(ecmfull) #note estimates slightly different because we've dropped obs for model comparability
logLik(ecmfull)
BIC(ecmfull)
r2.corr(ecmfull)

# out of sample prediction
tc <- trainControl("cv",5)
train(d_calstotal ~ l.calstotal + d_gdpcxr100 + l.gdpcxr100 + as.factor(polity3cat) + 
        d_gdpcxr100*as.factor(polity3cat)+as.factor(country), data=calories,
      method="glm",trControl=tc)$results$RMSE
train(d_calstotal ~ l.calstotal + d_gdpcxr100 + l.gdpcxr100 + d_polity3cat + as.factor(l.polity3cat) +
        d_gdpcxr100*as.factor(l.polity3cat) + l.gdpcxr100*d_polity3cat + d_gdpcxr100*d_polity3cat +
        l.gdpcxr100*as.factor(l.polity3cat) + as.factor(country), data=calories,
      method="glm",trControl=tc)$results$RMSE

#######################################################################
##    B & K's key finding is that growth decreases inequality        ##
##    in hybrid regimes and democracies. We can study this by        ##
##    holding all regime types (i.e., not allowing regime change)    ## 
##    and varying the level of democracy, then studying growth's     ##
##    effects.                                                       ##
#######################################################################

pred_effects_by_regchange <- function(d.gdp = 1){
  thetas <- coef(ecmfull)
  varcov <- vcov(ecmfull)
  theta.tilde <- data.frame(mvrnorm(5000,thetas,varcov))
  theta.tilde <- theta.tilde[,c(2:7,119:124)] # all relevant variables. This order treats GDP as x and regime type as z
  colnames(theta.tilde) <- c("gamma_1","theta_0","theta_1","theta_2","theta_3h","theta_3d","theta_4h","theta_4d",
                             "theta_5","theta_6","theta_7h","theta_7d") # notation of Eq 4 in my paper

  # create storage for results
  effects <- data.frame(matrix(ncol=7,nrow=3))
  colnames(effects) <- c("inst.lo","inst.est","inst.hi","total.lo","total.est","total.hi","reg")
  effects$reg <- c("autoc","hybrid","democ")

  # SRE for each change in regime for each regime type
  theta.tilde$sre_autoc  <- theta.tilde$theta_0
  theta.tilde$sre_hybrid <- rowSums(theta.tilde[,c("theta_0","theta_4h")]) # Eq A1, where d.zt is 0.
  theta.tilde$sre_democ  <- rowSums(theta.tilde[,c("theta_0","theta_4d")]) # Eq A1, where d.zt is 0.
  
  effects$inst.lo[which(effects$reg == "autoc")] <- unname(quantile(theta.tilde$sre_autoc, .025))
  effects$inst.est[which(effects$reg == "autoc")] <- unname(summary(theta.tilde$sre_autoc)[4])
  effects$inst.hi[which(effects$reg == "autoc")] <- unname(quantile(theta.tilde$sre_autoc, .975))
  effects$inst.lo[which(effects$reg == "hybrid")] <- unname(quantile(theta.tilde$sre_hybrid, .025))
  effects$inst.est[which(effects$reg == "hybrid")] <- unname(summary(theta.tilde$sre_hybrid)[4])
  effects$inst.hi[which(effects$reg == "hybrid")] <- unname(quantile(theta.tilde$sre_hybrid, .975))
  effects$inst.lo[which(effects$reg == "democ")] <- unname(quantile(theta.tilde$sre_democ, .025))
  effects$inst.est[which(effects$reg == "democ")] <- unname(summary(theta.tilde$sre_democ)[4])
  effects$inst.hi[which(effects$reg == "democ")] <- unname(quantile(theta.tilde$sre_democ, .975))
  return(effects)
}

effects <- pred_effects_by_regchange()
effects$regcode <- c(0,1,2)

bk.short.run <- ggplot(effects) + 
  theme_bw() + 
  theme(panel.grid.minor = element_blank(),panel.grid.major=element_blank(),
        panel.border = element_rect(fill=NA,linetype="solid")) +
  geom_segment(aes(x=regcode,xend=regcode,y=inst.hi,yend=inst.lo), size=.8,show_guide=F) +
  geom_point(aes(x=regcode,y=inst.est),size=1.9,show_guide=F) +
  geom_hline(aes(yintercept=0),linetype="dashed",alpha=.7) +
  labs(title="Instantaneous effects",x="",y=expression(paste(Delta, "Calories"))) +
  scale_x_continuous(breaks=c(0,1,2), labels=c("Autocracy","Hybrid","Democracy"),limits=c(-.5,2.5)) + 
  theme(axis.text.x = element_text(size=rel(1.2)), axis.text.y=element_text(size=rel(1.2))) # +
  # theme(text=element_text(family="minpro"))


######### Figure SM. 16 #########
pdf("sm-bk-sr.pdf",width=4.4,height=2.5)
plot(bk.short.run)
dev.off()


temp_effects_no_regchange <- function(d.gdp=1){
  thetas <- coef(ecmfull)
  varcov <- vcov(ecmfull)
  theta.tilde <- data.frame(mvrnorm(5000,thetas,varcov))
  theta.tilde <- theta.tilde[,c(2:7,119:124)] # all relevant variables. This order treats GDP as x and regime type as z
  colnames(theta.tilde) <- c("gamma_1","theta_0","theta_1","theta_2","theta_3h","theta_3d","theta_4h","theta_4d",
                             "theta_5","theta_6","theta_7h","theta_7d") # notation of Eq 4 in my paper
  
  # create storage for results
  temp.effects.autoc <- temp.effects.hybrid <- temp.effects.democ <- data.frame(matrix(nrow=nrow(theta.tilde),ncol=21))
  per.effects.autoc <- per.effects.hybrid <- per.effects.democ <- data.frame(matrix(nrow=nrow(theta.tilde),ncol=21))
  ### calculate quantities of interest
  # autocracy
  for(k in 1:21){
    for(j in 1:nrow(theta.tilde)){
      # subtracting 1 off of the exponents so that it loops into the right place, e.g. the instantaneous effect in row 1, column 1
      temp.effects.autoc[j,k] <- (
        (theta.tilde$theta_0[j])*(1-(theta.tilde$gamma_1[j] + 1)^(k+1-1)) +
          (theta.tilde$theta_1[j] - theta.tilde$theta_0[j])*(1-(theta.tilde$gamma_1[j] + 1)^(k-1))
      )/(-theta.tilde$gamma_1[j])
      per.effects.autoc[j,k] <- (((theta.tilde$gamma_1[j] + 1)^k)*(theta.tilde$theta_0[j]) +
          ((theta.tilde$gamma_1[j] + 1)^(k-1))*(theta.tilde$theta_1[j] - theta.tilde$theta_0[j]))
    } 
  }
  
  # hybrid
  for(k in 1:21){
    for(j in 1:nrow(theta.tilde)){
      # subtracting 1 off of the exponents so that it loops into the right place, e.g. the instantaneous effect in row 1, column 1
      temp.effects.hybrid[j,k] <- (
        (theta.tilde$theta_0[j] + theta.tilde$theta_4h[j])*(1-(theta.tilde$gamma_1[j] + 1)^(k+1-1)) +
          (theta.tilde$theta_1[j] - theta.tilde$theta_0[j] - theta.tilde$theta_4h[j] + 
             theta.tilde$theta_7h[j])*(1-(theta.tilde$gamma_1[j] + 1)^(k-1))
      )/(-theta.tilde$gamma_1[j])
      
      per.effects.hybrid[j,k] <- (((theta.tilde$gamma_1[j] + 1)^k)*(theta.tilde$theta_0[j] + theta.tilde$theta_4h[j]) +
                                   ((theta.tilde$gamma_1[j] + 1)^(k-1))*(theta.tilde$theta_1[j] - theta.tilde$theta_0[j] -
                                                                         theta.tilde$theta_4h[j] + theta.tilde$theta_7h[j]))
    } 
  }
  # democracy
  for(k in 1:21){
    for(j in 1:nrow(theta.tilde)){
      # subtracting 1 off of the exponents so that it loops into the right place, e.g. the instantaneous effect in row 1, column 1
      temp.effects.democ[j,k] <- (
        (theta.tilde$theta_0[j] + theta.tilde$theta_4d[j])*(1-(theta.tilde$gamma_1[j] + 1)^(k+1-1)) +
          (theta.tilde$theta_1[j] - theta.tilde$theta_0[j] - theta.tilde$theta_4d[j] 
           + theta.tilde$theta_7d[j])*(1-(theta.tilde$gamma_1[j] + 1)^(k-1))
      )/(-theta.tilde$gamma_1[j])
      per.effects.democ[j,k] <- (((theta.tilde$gamma_1[j] + 1)^k)*(theta.tilde$theta_0[j] + theta.tilde$theta_4d[j]) +
                                   ((theta.tilde$gamma_1[j] + 1)^(k-1))*(theta.tilde$theta_1[j] - theta.tilde$theta_0[j] -
                                                                         theta.tilde$theta_4d[j] + theta.tilde$theta_7d[j]))
    } 
  }
  temp.effects.autoc.mean <- apply(temp.effects.autoc,2,mean)
  temp.effects.autoc <-apply(temp.effects.autoc,2,quantile,c(.025,.975))
  per.effects.autoc.mean <- apply(per.effects.autoc,2,mean)
  per.effects.autoc.mean <- c(NA,per.effects.autoc.mean[-length(per.effects.autoc.mean)])
  per.effects.autoc <-apply(per.effects.autoc,2,quantile,c(.025,.975))
  per.effects.autoc <- cbind(rep(NA,2),per.effects.autoc[,-ncol(per.effects.autoc)])
  temp.effects.autoc.df <- data.frame(year=0:(ncol(temp.effects.autoc)-1),t(rbind(temp.effects.autoc,temp.effects.autoc.mean)),
                                      t(rbind(per.effects.autoc,per.effects.autoc.mean)))
  colnames(temp.effects.autoc.df) <- c("year","lower","upper","mean","per.lo","per.up","per.mean")
  temp.effects.hybrid.mean <- apply(temp.effects.hybrid,2,mean)
  temp.effects.hybrid <-apply(temp.effects.hybrid,2,quantile,c(.025,.975))
  per.effects.hybrid.mean <- apply(per.effects.hybrid,2,mean)
  per.effects.hybrid.mean <- c(NA,per.effects.hybrid.mean[-length(per.effects.hybrid.mean)])
  per.effects.hybrid <-apply(per.effects.hybrid,2,quantile,c(.025,.975))
  per.effects.hybrid <- cbind(rep(NA,2),per.effects.hybrid[,-ncol(per.effects.hybrid)])
  temp.effects.hybrid.df <- data.frame(year=0:(ncol(temp.effects.hybrid)-1),t(rbind(temp.effects.hybrid,temp.effects.hybrid.mean)),
                                       t(rbind(per.effects.hybrid,per.effects.hybrid.mean)))
  colnames(temp.effects.hybrid.df) <- c("year","lower","upper","mean","per.lo","per.up","per.mean")
  temp.effects.democ.mean <- apply(temp.effects.democ,2,mean)
  temp.effects.democ <-apply(temp.effects.democ,2,quantile,c(.025,.975))
  per.effects.democ.mean <- apply(per.effects.democ,2,mean)
  per.effects.democ.mean <- c(NA,per.effects.democ.mean[-length(per.effects.democ.mean)])
  per.effects.democ <-apply(per.effects.democ,2,quantile,c(.025,.975))
  per.effects.democ <- cbind(rep(NA,2),per.effects.democ[,-ncol(per.effects.democ)])
  temp.effects.democ.df <- data.frame(year=0:(ncol(temp.effects.democ)-1),t(rbind(temp.effects.democ,temp.effects.democ.mean)),
                                      t(rbind(per.effects.democ,per.effects.democ.mean)))
  colnames(temp.effects.democ.df) <- c("year","lower","upper","mean","per.lo","per.up","per.mean")
  
  #store inst effects
  theta.tilde$sre.autocracy <- theta.tilde$sre.hybrid <- theta.tilde$sre.democracy <- rep(NA,nrow(theta.tilde)) 
  for(j in 1:nrow(theta.tilde)){
    # subtracting 1 off of the exponents so that it loops into the right place, e.g. the instantaneous effect in row 1, column 1
    theta.tilde$sre.autocracy[j] <- (theta.tilde$theta_0[j])
    theta.tilde$sre.hybrid[j]    <- (theta.tilde$theta_0[j] + theta.tilde$theta_4h[j])
    theta.tilde$sre.democracy[j] <- (theta.tilde$theta_0[j] + theta.tilde$theta_4d[j])
  } 
  
  sres.mean <- apply(theta.tilde[,c("sre.autocracy","sre.hybrid","sre.democracy")],2,mean)
  sres.quants <- apply(theta.tilde[,c("sre.autocracy","sre.hybrid","sre.democracy")],2,quantile,c(.025,.975))
  sres <- data.frame(t(rbind(sres.quants,sres.mean)))
  colnames(sres) <- c("lower","upper","mean")
  temp.effects.autoc.df[1,c(5:7)] <- sres[1,]
  temp.effects.hybrid.df[1,c(5:7)] <- sres[2,]
  temp.effects.democ.df[1,c(5:7)] <- sres[3,]
  
  # store lres
  theta.tilde$lre.autocracy <- theta.tilde$lre.hybrid <- theta.tilde$lre.democracy <- rep(NA,nrow(theta.tilde)) 
  for(j in 1:nrow(theta.tilde)){
    # subtracting 1 off of the exponents so that it loops into the right place, e.g. the instantaneous effect in row 1, column 1
    theta.tilde$lre.autocracy[j] <- (theta.tilde$theta_1[j])/(-theta.tilde$gamma_1[j])
    theta.tilde$lre.hybrid[j]    <- (theta.tilde$theta_1[j] + theta.tilde$theta_7h[j])/(-theta.tilde$gamma_1[j])
    theta.tilde$lre.democracy[j] <- (theta.tilde$theta_1[j] + theta.tilde$theta_7d[j])/(-theta.tilde$gamma_1[j])
  } 
  
  lres.mean <- apply(theta.tilde[,c("lre.autocracy","lre.hybrid","lre.democracy")],2,mean)
  lres.quants <- apply(theta.tilde[,c("lre.autocracy","lre.hybrid","lre.democracy")],2,quantile,c(.025,.975))
  lres <- data.frame(t(rbind(lres.quants,lres.mean)))
  colnames(lres) <- c("lower","upper","mean")
  
  
  dfs <- as.list(NULL)
  dfs[[1]] <- temp.effects.autoc.df
  dfs[[2]] <- temp.effects.hybrid.df
  dfs[[3]] <- temp.effects.democ.df
  dfs[[4]] <- sres
  dfs[[5]] <- lres
  names(dfs) <- c("autocracy","hybrid","democracy","sres","lres")
  return(dfs)
}

dfs <- temp_effects_no_regchange()

### Period-specific effects
# coverage
dfs[[1]]$cov <- ifelse(dfs[[1]]$per.up > 0 & dfs[[1]]$per.lo < 0,0,1)
dfs[[2]]$cov <- ifelse(dfs[[2]]$per.up > 0 & dfs[[2]]$per.lo < 0,0,1)
dfs[[3]]$cov <- ifelse(dfs[[3]]$per.up > 0 & dfs[[3]]$per.lo < 0,0,1)
# remove the SREs just for visual attractiveness; already plotted in earlier figure anyway
autoc <- dfs[[1]][-1,]
hybrid <- dfs[[2]][-1,]
democ <- dfs[[3]][-1,]

fig5.1 <- ggplot(autoc, aes(x=year,y=per.mean)) + 
  theme_bw() + 
  theme(panel.grid.minor = element_blank(),panel.grid.major=element_blank(),
        panel.border = element_rect(fill=NA,size=1,linetype="solid"),
        plot.margin = unit(c(.1,.1,.1,.1), "cm")) +
  theme(axis.text.y = element_text(margin = margin(r = 7))) +
  geom_segment(aes(x=year,xend=year,y=per.lo,yend=per.up),size=.5) +
  geom_hline(aes(yintercept=0),linetype="dashed",alpha=.7) +
  geom_point(show_guide=F,shape=21,aes(x=year,y=per.mean,fill=factor(cov)),size=1.5) + 
  scale_fill_manual(values=c("white", "black")) +
  scale_colour_manual(values=c("black", "black")) +
  scale_x_continuous(limits=c(0,24),breaks=c(0,5,10,15,20),labels=c(0,5,10,15,20)) +
  coord_cartesian(ylim=c(-2.5, 1.5)) +
  labs(title="Autocracies",x="Year",y=expression(paste(Delta, " Calories"))) + 
  theme(axis.title.x=element_text(color="white")) #+
  #theme(text=element_text(family="minpro"))

fig5.2 <- ggplot(hybrid, aes(x=year,y=per.mean)) + 
  theme_bw() + 
  theme(panel.grid.minor = element_blank(),panel.grid.major=element_blank(),
        panel.border = element_rect(fill=NA,size=1,linetype="solid"),
        plot.margin = unit(c(.1,.1,.1,.1), "cm")) +
  theme(axis.text.y = element_text(margin = margin(r = 7))) +
  geom_segment(aes(x=year,xend=year,y=per.lo,yend=per.up),size=.5) +
  geom_hline(aes(yintercept=0),linetype="dashed",alpha=.7) +
  geom_point(show_guide=F,shape=21,aes(x=year,y=per.mean,fill=factor(cov)),size=1.5) + 
  scale_fill_manual(values=c("black", "white")) +
  scale_colour_manual(values=c("black", "black")) +
  scale_x_continuous(limits=c(0,24),breaks=c(0,5,10,15,20),labels=c(0,5,10,15,20)) +
  coord_cartesian(ylim=c(-2.5, 1.5)) +
  labs(title="Hybrid regimes",x="Year",y="") + 
  theme(axis.title.x=element_text(color="white")) #+
  #theme(text=element_text(family="minpro"))

fig5.3 <- ggplot(democ, aes(x=year,y=per.mean)) + 
  theme_bw() + 
  theme(panel.grid.minor = element_blank(),panel.grid.major=element_blank(),
        panel.border = element_rect(fill=NA,size=1,linetype="solid"),
        plot.margin = unit(c(.1,.1,.1,.1), "cm")) +
  theme(axis.text.y = element_text(margin = margin(r = 7))) +
  geom_segment(aes(x=year,xend=year,y=per.lo,yend=per.up),size=.5) +
  geom_hline(aes(yintercept=0),linetype="dashed",alpha=.7) +
  geom_point(show_guide=F,shape=21,aes(x=year,y=per.mean,fill=factor(cov)),size=1.5) + 
  scale_fill_manual(values=c("white", "black")) +
  scale_colour_manual(values=c("black", "black")) +
  scale_x_continuous(limits=c(0,24),breaks=c(0,5,10,15,20),labels=c(0,5,10,15,20)) +
  coord_cartesian(ylim=c(-2.5, 1.5)) +
  labs(title="Democracies",x="Year",y="") +
  theme(axis.title.x=element_text(color="white")) #+
  #theme(text=element_text(family="minpro"))

### cumulative effects
fig5.4 <- ggplot(dfs[[1]], aes(x=year,y=mean)) + 
  theme_bw() + 
  theme(panel.grid.minor = element_blank(),panel.grid.major=element_blank(),
        panel.border = element_rect(fill=NA,size=1,linetype="solid"),
        plot.margin = unit(c(.1,.1,.1,.1), "cm")) +
  geom_ribbon(aes(ymin=lower, ymax=upper), alpha=.25) +
  geom_line(aes(x=year,y=mean)) +
  geom_hline(aes(yintercept=0),linetype="dashed",alpha=.7) +
  geom_segment(data=dfs[[5]]["lre.autocracy",],aes(x=23,xend=23,y=lower,yend=upper),size=.75) +
  geom_point(data=dfs[[5]]["lre.autocracy",],aes(x=23,y=mean),size=2,shape=21,fill="white") + 
  scale_x_continuous(limits=c(0,24),breaks=c(0,5,10,15,20,23),labels=c(0,5,10,15,20,expression(infinity))) +
  coord_cartesian(ylim=c(-13, 23)) +
  labs(title="Autocracies",x="Year",y=expression(paste(Delta, "Calories, cumulative"))) #+ 
  #theme(text=element_text(family="minpro"))

fig5.5 <- ggplot(dfs[[2]], aes(x=year,y=mean)) + 
  theme_bw() + 
  theme(panel.grid.minor = element_blank(),panel.grid.major=element_blank(),
        panel.border = element_rect(fill=NA,size=1,linetype="solid"),
        plot.margin = unit(c(.1,.1,.1,.1), "cm")) +
  geom_ribbon(aes(ymin=lower, ymax=upper), alpha=.25) +
  geom_line(aes(x=year,y=mean)) +
  geom_hline(aes(yintercept=0),linetype="dashed",alpha=.7) +
  geom_segment(data=dfs[[5]]["lre.hybrid",],aes(x=23,xend=23,y=lower,yend=upper),size=.75) +
  geom_point(data=dfs[[5]]["lre.hybrid",],aes(x=23,y=mean),size=2,shape=21,fill="white") + 
  scale_x_continuous(limits=c(0,24),breaks=c(0,5,10,15,20,23),labels=c(0,5,10,15,20,expression(infinity))) +
  coord_cartesian(ylim=c(-13, 23)) +
  labs(title="Hybrid regimes",x="Year",y="") #+ 
  #theme(text=element_text(family="minpro"))

fig5.6 <- ggplot(dfs[[3]], aes(x=year,y=mean)) + 
  theme_bw() + 
  theme(panel.grid.minor = element_blank(),panel.grid.major=element_blank(),
        panel.border = element_rect(fill=NA,size=1,linetype="solid"),
        plot.margin = unit(c(.1,.1,.1,.1), "cm")) +
  geom_ribbon(aes(ymin=lower, ymax=upper), alpha=.25) +
  geom_line(aes(x=year,y=mean)) +
  geom_hline(aes(yintercept=0),linetype="dashed",alpha=.7) +
  geom_segment(data=dfs[[5]]["lre.democracy",],aes(x=23,xend=23,y=lower,yend=upper),size=.75) +
  geom_point(data=dfs[[5]]["lre.democracy",],aes(x=23,y=mean),size=2,shape=21,fill="white") + 
  scale_x_continuous(limits=c(0,24),breaks=c(0,5,10,15,20,23),labels=c(0,5,10,15,20,expression(infinity))) +
  coord_cartesian(ylim=c(-13, 23)) +
  labs(title="Democracies",x="Year",y="") #+
  #theme(text=element_text(family="minpro"))


######### Figure SM. 17 #########
pdf("sm-bk-lr.pdf",width=7,height=5)
multiplot(fig5.1,fig5.4,fig5.2,fig5.5,fig5.3,fig5.6,cols=3)
dev.off()


### Threshold effects (unreported in SM)
# these plots suggest the instant effect diminishes over time. When does it hit 0 for hybrid and democracies?
dfs[[2]][,c(1:2)] # lower bound turns negative in t=8 for hybrids
dfs[[3]][,c(1:2)] # lower bound turns negative in t=12 for democracies












##### DIAGNOSING TIME SERIES PROPERTIES #####
# At the end of each for loop is the mean and % that consitutes Table SM. 7

intdata <- completeFun(calories,c("calstotal","country","year"))
countries <- unique(intdata$country)
# test calstotal for nonstationarity
ps <- as.vector(NULL)
for(i in 1:length(countries)){
  tmp <- intdata[intdata$country==countries[i],]
  print(countries[i])
  print(try(adf.test(tmp$calstotal)))
  ps <- c(ps,adf.test(tmp$calstotal)$p.value)
} 
mean(ps)
length(ps[which(ps < .05)])/length(ps)
# looks like a unit root is a good start

intdata <- completeFun(calories,c("d_calstotal","country","year"))
countries <- unique(intdata$country)
ps <- as.vector(NULL)
# test the first difference for nonstationarity
for(i in 1:length(countries)){
  tmp <- intdata[intdata$country==countries[i],]
  print(countries[i])
  print(try(adf.test(tmp$d_calstotal)))
  ps <- c(ps,adf.test(tmp$d_calstotal)$p.value)
} 
mean(ps)
length(ps[which(ps < .05)])/length(ps)



# looks much better, but might still be under-differenced. let's check the ACFs/PACFs again
# This is just illustrative; none of the below is reported in the SM

for(i in 1:length(countries)){
  # ACFs
  tmp <- intdata[intdata$country==countries[i],]
  eval(parse(text=paste("tmpdata <- Acf(tmp$d_calstotal, lag.max=25, na.action = na.pass, main='",countries[i],"', plot=F)",sep="")))
  tmpdata <- data.frame(lag=tmpdata$lag, acf=tmpdata$acf)
  tmpdata$min <- tmpdata$max <- rep(NA,nrow(tmpdata))
  tmpdata$max[1] <- 1/sqrt(nrow(tmp))
  for(j in 2:nrow(tmpdata)){
    tmpdata$max[j] <- (sqrt((1/nrow(tmp))*(1+(2*sum((tmpdata$max[c(1:tmpdata$lag[j])])^2)))))
  }
  tmpdata$max <- qnorm(.975)*tmpdata$max
  tmpdata$min <- -1*(tmpdata$max)
  filename <- paste("./diagnostic plots/bk-acf-calstotal-",countries[i],".pdf",sep="")
  # pdf(filename,width=6,height=3.25)
  showtext.begin()
  print(ggplot(tmpdata, aes(lag, acf)) + 
          theme_bw() + geom_ribbon(aes(ymin=min, ymax=max),fill="grey70") +
          geom_lollipop(point.colour="darkblue", colour="darkblue") +
          coord_cartesian(ylim=c(-1,1),xlim=c(0,max(tmpdata$lag))) +
          labs(title=paste("ACF: ",countries[i],sep=""),x="Lag",y="Autocorrelations of the interaction term") +
          geom_hline(aes(yintercept=0)) +
          theme(text=element_text(family="minpro")))
  showtext.end()
  dev.off()
  # PACFs
  eval(parse(text=paste("tmpdata <- Pacf(tmp$d_calstotal, lag.max=25, na.action = na.pass, main='",countries[i],"', plot=F)",sep="")))
  tmpdata <- data.frame(lag=tmpdata$lag, pcf=tmpdata$acf)
  filename <- paste("./diagnostic plots/bk-pacf-calstotal-",countries[i],".pdf",sep="")
  # pdf(filename,width=6,height=3.25)
  showtext.begin()
  print(ggplot(tmpdata, aes(lag, pcf)) + 
          theme_bw() + geom_ribbon(aes(ymin=(qnorm(.975)/sqrt(nrow(tmp))), ymax=(-qnorm(.975)/sqrt(nrow(tmp)))),fill="grey70") +
          geom_lollipop(point.colour="darkblue", colour="darkblue") +
          coord_cartesian(ylim=c(-1,1),xlim=c(0,max(tmpdata$lag))) +
          labs(title=paste("PACF: ",countries[i],sep=""),x="Lag",y="Partial autocorrelations of the interaction term") +
          geom_hline(aes(yintercept=0)) +
          theme(text=element_text(family="minpro")))
  showtext.end()
  dev.off()
}  # looks like calories is a unit root process


# test GDP for nonstationarity
intdata <- completeFun(calories,c("gdpcxr100","country","year"))
countries <- unique(intdata$country)
ps <- as.vector(NULL)
for(i in 1:length(countries)){
  tmp <- intdata[intdata$country==countries[i],]
  print(countries[i])
  print(try(adf.test(tmp$gdpcxr100)))
  ps <- c(ps,adf.test(tmp$gdpcxr100)$p.value)
} 
mean(ps)
length(ps[which(ps < .05)])/length(ps)  # same

# test change in GDP
intdata <- completeFun(calories,c("d_gdpcxr100","country","year"))
countries <- unique(intdata$country)
ps <- as.vector(NULL)
# test the first difference for nonstationarity
for(i in 1:length(countries)){
  tmp <- intdata[intdata$country==countries[i],]
  print(countries[i])
  print(try(adf.test(tmp$d_gdpcxr100)))
  ps <- c(ps,adf.test(tmp$d_gdpcxr100)$p.value)
} 
mean(ps)
length(ps[which(ps < .05)])/length(ps)


# looks much better, but might still be under-differenced. let's check the ACFs/PACFs again
# This is just illustrative; none of the below is reported in the SM


for(i in 1:length(countries)){
  # ACFs
  tmp <- intdata[intdata$country==countries[i],]
  eval(parse(text=paste("tmpdata <- Acf(tmp$d_gdpcxr100, lag.max=25, na.action = na.pass, main='",countries[i],"', plot=F)",sep="")))
  tmpdata <- data.frame(lag=tmpdata$lag, acf=tmpdata$acf)
  tmpdata$min <- tmpdata$max <- rep(NA,nrow(tmpdata))
  tmpdata$max[1] <- 1/sqrt(nrow(tmp))
  for(j in 2:nrow(tmpdata)){
    tmpdata$max[j] <- (sqrt((1/nrow(tmp))*(1+(2*sum((tmpdata$max[c(1:tmpdata$lag[j])])^2)))))
  }
  tmpdata$max <- qnorm(.975)*tmpdata$max
  tmpdata$min <- -1*(tmpdata$max)
  filename <- paste("./diagnostic plots/bk-acf-gdpcxr100-",countries[i],".pdf",sep="")
  # pdf(filename,width=6,height=3.25)
  showtext.begin()
  print(ggplot(tmpdata, aes(lag, acf)) + 
          theme_bw() + geom_ribbon(aes(ymin=min, ymax=max),fill="grey70") +
          geom_lollipop(point.colour="darkblue", colour="darkblue") +
          coord_cartesian(ylim=c(-1,1),xlim=c(0,max(tmpdata$lag))) +
          labs(title=paste("ACF: ",countries[i],sep=""),x="Lag",y="Autocorrelations of the interaction term") +
          geom_hline(aes(yintercept=0)) +
          theme(text=element_text(family="minpro")))
  showtext.end()
  dev.off()
  # PACFs
  eval(parse(text=paste("tmpdata <- Pacf(tmp$d_gdpcxr100, lag.max=25, na.action = na.pass, main='",countries[i],"', plot=F)",sep="")))
  tmpdata <- data.frame(lag=tmpdata$lag, pcf=tmpdata$acf)
  filename <- paste("./diagnostic plots/bk-pacf-gdpcxr100-",countries[i],".pdf",sep="")
  # pdf(filename,width=6,height=3.25)
  showtext.begin()
  print(ggplot(tmpdata, aes(lag, pcf)) + 
          theme_bw() + geom_ribbon(aes(ymin=(qnorm(.975)/sqrt(nrow(tmp))), ymax=(-qnorm(.975)/sqrt(nrow(tmp)))),fill="grey70") +
          geom_lollipop(point.colour="darkblue", colour="darkblue") +
          coord_cartesian(ylim=c(-1,1),xlim=c(0,max(tmpdata$lag))) +
          labs(title=paste("PACF: ",countries[i],sep=""),x="Lag",y="Partial autocorrelations of the interaction term") +
          geom_hline(aes(yintercept=0)) +
          theme(text=element_text(family="minpro")))
  showtext.end()
  dev.off()
}  # looks like gdp is a unit root process, which we'd expect, especially given the results elsewhere.




# test regime type
intdata <- completeFun(calories,c("polity3cat","country","year"))
countries <- unique(intdata$country)
ps <- as.vector(NULL)
for(i in 1:length(countries)){
  tmp <- intdata[intdata$country==countries[i],]
  print(countries[i])
  print(try(adf.test(as.factor(tmp$polity3cat))))
  ps <- c(ps,adf.test(as.factor(tmp$polity3cat))$p.value)
} 
mean(!is.na(ps))
length(ps[which(ps < .05)])/length(ps)

# test change in regime type
intdata <- completeFun(calories,c("d_polity3cat","country","year"))
countries <- unique(intdata$country)
ps <- as.vector(NULL)
# test the first difference for nonstationarity
for(i in 1:length(countries)){
  tmp <- intdata[intdata$country==countries[i],]
  print(countries[i])
  print(try(adf.test(tmp$d_polity3cat)))
  ps <- c(ps,adf.test(as.factor(tmp$d_polity3cat))$p.value)
} 
mean(!is.na(ps))
length(ps[which(ps < .05)])/length(ps)



# looks much better, but might still be under-differenced. let's check the ACFs/PACFs again
# This is just illustrative; none of the below is reported in the SM


for(i in 1:length(countries)){
  # ACFs
  tmp <- intdata[intdata$country==countries[i],]
  eval(parse(text=paste("tmpdata <- Acf(tmp$d_polity3cat, lag.max=25, na.action = na.pass, main='",countries[i],"', plot=F)",sep="")))
  tmpdata <- data.frame(lag=tmpdata$lag, acf=tmpdata$acf)
  tmpdata$min <- tmpdata$max <- rep(NA,nrow(tmpdata))
  tmpdata$max[1] <- 1/sqrt(nrow(tmp))
  for(j in 2:nrow(tmpdata)){
    tmpdata$max[j] <- (sqrt((1/nrow(tmp))*(1+(2*sum((tmpdata$max[c(1:tmpdata$lag[j])])^2)))))
  }
  tmpdata$max <- qnorm(.975)*tmpdata$max
  tmpdata$min <- -1*(tmpdata$max)
  filename <- paste("./diagnostic plots/bk-acf-polity3cat-",countries[i],".pdf",sep="")
  # pdf(filename,width=6,height=3.25)
  showtext.begin()
  print(ggplot(tmpdata, aes(lag, acf)) + 
          theme_bw() + geom_ribbon(aes(ymin=min, ymax=max),fill="grey70") +
          geom_lollipop(point.colour="darkblue", colour="darkblue") +
          coord_cartesian(ylim=c(-1,1),xlim=c(0,max(tmpdata$lag))) +
          labs(title=paste("ACF: ",countries[i],sep=""),x="Lag",y="Autocorrelations of the interaction term") +
          geom_hline(aes(yintercept=0)) +
          theme(text=element_text(family="minpro")))
  showtext.end()
  dev.off()
  # PACFs
  eval(parse(text=paste("tmpdata <- Pacf(tmp$d_polity3cat, lag.max=25, na.action = na.pass, main='",countries[i],"', plot=F)",sep="")))
  tmpdata <- data.frame(lag=tmpdata$lag, pcf=tmpdata$acf)
  filename <- paste("./diagnostic plots/bk-pacf-polity3cat-",countries[i],".pdf",sep="")
  # pdf(filename,width=6,height=3.25)
  showtext.begin()
  print(ggplot(tmpdata, aes(lag, pcf)) + 
          theme_bw() + geom_ribbon(aes(ymin=(qnorm(.975)/sqrt(nrow(tmp))), ymax=(-qnorm(.975)/sqrt(nrow(tmp)))),fill="grey70") +
          geom_lollipop(point.colour="darkblue", colour="darkblue") +
          coord_cartesian(ylim=c(-1,1),xlim=c(0,max(tmpdata$lag))) +
          labs(title=paste("PACF: ",countries[i],sep=""),x="Lag",y="Partial autocorrelations of the interaction term") +
          geom_hline(aes(yintercept=0)) +
          theme(text=element_text(family="minpro")))
  showtext.end()
  dev.off()
}  # looks like gdp is a unit root process 


# finally, check the interaction term
calories$interaction <- calories$gdpcxr100*calories$polity3cat  # have to treat polity3cat as numeric...
calories <- ddply(calories, .(country), transform, d_interaction =
                    c(NA,diff(interaction,lag=1)))
intdata <- completeFun(calories,c("interaction","country","year"))
countries <- unique(intdata$country)
ps <- as.vector(NULL)
# test the interaction for nonstationarity
for(i in 1:length(countries)){
  tmp <- intdata[intdata$country==countries[i],]
  print(countries[i])
  print(try(adf.test(tmp$interaction)))
  ps <- c(ps,adf.test(tmp$interaction)$p.value)
} 
mean(!is.na(ps))
length(ps[which(ps < .05)])/length(ps)
# looks like a unit root is a good start

intdata <- completeFun(calories,c("d_interaction","country","year"))
countries <- unique(intdata$country)
ps <- as.vector(NULL)
# test the first difference for nonstationarity
for(i in 1:length(countries)){
  tmp <- intdata[intdata$country==countries[i],]
  print(countries[i])
  print(try(adf.test(tmp$d_interaction)))
  ps <- c(ps,adf.test(tmp$d_interaction)$p.value)
} 
mean(!is.na(ps))
length(ps[which(ps < .05)])/length(ps)


# looks much better, but might still be under-differenced. let's check the ACFs/PACFs again
# This is just illustrative; none of the below is reported in the SM


for(i in 1:length(countries)){
  # ACFs
  tmp <- intdata[intdata$country==countries[i],]
  eval(parse(text=paste("tmpdata <- Acf(tmp$d_interaction, lag.max=25, na.action = na.pass, main='",countries[i],"', plot=F)",sep="")))
  tmpdata <- data.frame(lag=tmpdata$lag, acf=tmpdata$acf)
  tmpdata$min <- tmpdata$max <- rep(NA,nrow(tmpdata))
  tmpdata$max[1] <- 1/sqrt(nrow(tmp))
  for(j in 2:nrow(tmpdata)){
    tmpdata$max[j] <- (sqrt((1/nrow(tmp))*(1+(2*sum((tmpdata$max[c(1:tmpdata$lag[j])])^2)))))
  }
  tmpdata$max <- qnorm(.975)*tmpdata$max
  tmpdata$min <- -1*(tmpdata$max)
  filename <- paste("./diagnostic plots/bk-acf-",countries[i],".pdf",sep="")
  # pdf(filename,width=6,height=3.25)
  showtext.begin()
  print(ggplot(tmpdata, aes(lag, acf)) + 
          theme_bw() + geom_ribbon(aes(ymin=min, ymax=max),fill="grey70") +
          geom_lollipop(point.colour="darkblue", colour="darkblue") +
          coord_cartesian(ylim=c(-1,1),xlim=c(0,max(tmpdata$lag))) +
          labs(title=paste("ACF: ",countries[i],sep=""),x="Lag",y="Autocorrelations of the interaction term") +
          geom_hline(aes(yintercept=0)) +
          theme(text=element_text(family="minpro")))
  showtext.end()
  dev.off()
  # PACFs
  eval(parse(text=paste("tmpdata <- Pacf(tmp$d_interaction, lag.max=25, na.action = na.pass, main='",countries[i],"', plot=F)",sep="")))
  tmpdata <- data.frame(lag=tmpdata$lag, pcf=tmpdata$acf)
  filename <- paste("./diagnostic plots/bk-pacf-",countries[i],".pdf",sep="")
 #  pdf(filename,width=6,height=3.25)
  showtext.begin()
  print(ggplot(tmpdata, aes(lag, pcf)) + 
          theme_bw() + geom_ribbon(aes(ymin=(qnorm(.975)/sqrt(nrow(tmp))), ymax=(-qnorm(.975)/sqrt(nrow(tmp)))),fill="grey70") +
          geom_lollipop(point.colour="darkblue", colour="darkblue") +
          coord_cartesian(ylim=c(-1,1),xlim=c(0,max(tmpdata$lag))) +
          labs(title=paste("PACF: ",countries[i],sep=""),x="Lag",y="Partial autocorrelations of the interaction term") +
          geom_hline(aes(yintercept=0)) +
          theme(text=element_text(family="minpro")))
  showtext.end()
  dev.off()
} # Again it's a bit trickier, but it looks to me like another difference would be too much. 





######### Table SM. 8 #########
#... and now let's confirm that everything is cointegrated.
# looking at ECM residuals (i.e. 1-step procedure)
ecmfull <- glm(d_calstotal ~ l.calstotal + d_gdpcxr100 + l.gdpcxr100 + d_polity3cat + as.factor(l.polity3cat) +
                 d_gdpcxr100*as.factor(l.polity3cat) + l.gdpcxr100*d_polity3cat + d_gdpcxr100*d_polity3cat +
                 l.gdpcxr100*as.factor(l.polity3cat) + as.factor(country), data=calories)
Box.test(resid(ecmfull),lag=20,type="Ljung-Box") # reject null -- could be a problem
adf.test(resid(ecmfull)) # reject null of non-stationarity, so evidence residuals are stationary (i.e., cointegration)
kpss.test(resid(ecmfull)) # do not reject null of no unit root, so evidence residuals are stationary (i.e., cointegration)

library(urca)
# Engle-Granger 2-step test
calories.eg <- completeFun(calories, c("calstotal","gdpcxr100","polity3cat","interaction"))
m1 <- lm(calstotal ~ gdpcxr100 + as.factor(polity3cat) + interaction, data = calories.eg)
residuals <- resid(m1)
res.ADF<-ur.df(residuals,type="none",selectlags="AIC")
summary(res.ADF) # reject null, so stationary, and cointegration.



######### Table SM. 9 #########
# A formal Johansen test
dat <- data.frame(cbind(calories$calstotal, calories$gdpcxr100, calories$polity3cat, calories$interaction))
ecmtest <- ca.jo(dat,type="trace",ecdet="const")
summary(ecmtest) # evidence of cointegrating vectors. 
summary(ca.jo(dat,type="trace",ecdet="trend")) # either way, still cointegrated

# None of these are perfect, but I think a bunch of unit roots and cointegration is a reasonable conclusion. 
# Ideally we'd have a balanced panel to do panel unit root and panel cointegration tests, and ideally we'd
# have really long time series so we could use Fractional Integration methods.

rm(list=ls()[!(ls() %in% c("cl","completeFun","multiplot","r2.corr"))])









